import pyarrow.parquet as pq
import os

# 指定.gz.parquet文件的路径
file_path = 's3://news2.lukiebuy.com/db/rds-2024-04-07/sm-station/sm-station.sm_shopify_store/1/part-00000-62c35042-833e-46f4-9b4b-31c041650ac5-c000.gz.parquet'

# 如果文件在S3上，需要使用AWS的凭证，可以通过环境变量设置
os.environ['AWS_ACCESS_KEY_ID'] = 'AKIAWR27IG6M3ZSQQN62'
os.environ['AWS_SECRET_ACCESS_KEY'] = '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'

# 读取.parquet文件
table = pq.read_table(file_path)

print(table["id"])
print(table["pixel_ids"])
print(table["pixel_access_token"])

# 查询操作，例如选择特定的列或者行
#selected_columns = table['pixel_ids', 'pixel_access_token']

# 或者过滤特定条件的行
#filtered_table = table.filter(table['id'] > 0)

# 将结果转换为pandas DataFrame进行分析
import pandas as pd

df = table.to_pandas()

# 进行你的数据分析操作...

outfile = 'f:\\lktime.csv'
df.to_csv(outfile, encoding='gbk')
